setwd("~/Dropbox/clado-manuscript/Mikes_MS_Data/")
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
# Load biom file. 
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
head(sam.data)
##        TreatmentGroup  Site Date                       Description
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
##        SampleID.1
## C172N1     C172N1
## C172N2     C172N2
## C172N3     C172N3
## C172P1     C172P1
## C172P2     C172P2
## C172P3     C172P3
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
head(sam.data); str(sam.data)
##        TreatmentGroup  Site Date                       Description
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
##        SampleID.1  DateSite
## C172N1     C172N1 172 North
## C172N2     C172N2 172 North
## C172N3     C172N3 172 North
## C172P1     C172P1 172 Point
## C172P2     C172P2 172 Point
## C172P3     C172P3 172 Point
## 'data.frame':    52 obs. of  6 variables:
##  $ TreatmentGroup: Factor w/ 2 levels "Early","Late": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Site          : Factor w/ 3 levels "North","Point",..: 1 1 1 2 2 2 3 3 3 1 ...
##  $ Date          : Factor w/ 6 levels "172","178","185",..: 1 1 1 1 1 1 1 1 1 2 ...
##  $ Description   : Factor w/ 52 levels "Sample of day 172 at site North 1",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ SampleID.1    : Factor w/ 52 levels "C172N1","C172N2",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ DateSite      : chr  "172 North" "172 North" "172 North" "172 Point" ...
sample_data(biom) <- sam.data
biom; sample_data(biom)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 51928 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 51928 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 51928 tips and 51914 internal nodes ]
## Sample Data:        [52 samples by 6 sample variables]:
##        TreatmentGroup  Site Date                       Description
## C178N1          Early North  178 Sample of day 178 at site North 1
## C178P1          Early Point  178 Sample of day 178 at site Point 1
## C185P2          Early Point  185 Sample of day 185 at site Point 2
## C206N2           Late North  206 Sample of day 206 at site North 2
## C206P1           Late Point  206 Sample of day 206 at site Point 1
## C206P2           Late Point  206 Sample of day 206 at site Point 2
## C214P1           Late Point  214 Sample of day 214 at site Point 1
## C214P2           Late Point  214 Sample of day 214 at site Point 2
## C214P3           Late Point  214 Sample of day 214 at site Point 3
## C214S1           Late South  214 Sample of day 214 at site South 1
## C214S2           Late South  214 Sample of day 214 at site South 2
## C214S3           Late South  214 Sample of day 214 at site South 3
## C185P1          Early Point  185 Sample of day 185 at site Point 1
## C185P3          Early Point  185 Sample of day 185 at site Point 3
## C199P3           Late Point  199 Sample of day 199 at site Point 3
## C199S2           Late South  199 Sample of day 199 at site South 2
## C199S3           Late South  199 Sample of day 199 at site South 3
## C206N1           Late North  206 Sample of day 206 at site North 1
## C178P2          Early Point  178 Sample of day 178 at site Point 2
## C199N3           Late North  199 Sample of day 199 at site North 3
## C206S1           Late South  206 Sample of day 206 at site South 1
## C214N3           Late North  214 Sample of day 214 at site North 3
## C199N2           Late North  199 Sample of day 199 at site North 2
## C206N3           Late North  206 Sample of day 206 at site North 3
## C206S2           Late South  206 Sample of day 206 at site South 2
## C199N1           Late North  199 Sample of day 199 at site North 1
## C199P1           Late Point  199 Sample of day 199 at site Point 1
## C199S1           Late South  199 Sample of day 199 at site South 1
## C214N1           Late North  214 Sample of day 214 at site North 1
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C199P2           Late Point  199 Sample of day 199 at site Point 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172S3          Early South  172 Sample of day 172 at site South 3
## C178S2          Early South  178 Sample of day 178 at site South 2
## C178P3          Early Point  178 Sample of day 178 at site Point 3
## C178S3          Early South  178 Sample of day 178 at site South 3
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172S1          Early South  172 Sample of day 172 at site South 1
## C178N3          Early North  178 Sample of day 178 at site North 3
## C185N2          Early North  185 Sample of day 185 at site North 2
## C185N3          Early North  185 Sample of day 185 at site North 3
## C185S3          Early South  185 Sample of day 185 at site South 3
## C214N2           Late North  214 Sample of day 214 at site North 2
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C185S2          Early South  185 Sample of day 185 at site South 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
## C185N1          Early North  185 Sample of day 185 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C178S1          Early South  178 Sample of day 178 at site South 1
## C185S1          Early South  185 Sample of day 185 at site South 1
## C172S2          Early South  172 Sample of day 172 at site South 2
## C178N2          Early North  178 Sample of day 178 at site North 2
##        SampleID.1  DateSite
## C178N1     C178N1 178 North
## C178P1     C178P1 178 Point
## C185P2     C185P2 185 Point
## C206N2     C206N2 206 North
## C206P1     C206P1 206 Point
## C206P2     C206P2 206 Point
## C214P1     C214P1 214 Point
## C214P2     C214P2 214 Point
## C214P3     C214P3 214 Point
## C214S1     C214S1 214 South
## C214S2     C214S2 214 South
## C214S3     C214S3 214 South
## C185P1     C185P1 185 Point
## C185P3     C185P3 185 Point
## C199P3     C199P3 199 Point
## C199S2     C199S2 199 South
## C199S3     C199S3 199 South
## C206N1     C206N1 206 North
## C178P2     C178P2 178 Point
## C199N3     C199N3 199 North
## C206S1     C206S1 206 South
## C214N3     C214N3 214 North
## C199N2     C199N2 199 North
## C206N3     C206N3 206 North
## C206S2     C206S2 206 South
## C199N1     C199N1 199 North
## C199P1     C199P1 199 Point
## C199S1     C199S1 199 South
## C214N1     C214N1 214 North
## C172P1     C172P1 172 Point
## C199P2     C199P2 199 Point
## C172N3     C172N3 172 North
## C172S3     C172S3 172 South
## C178S2     C178S2 178 South
## C178P3     C178P3 178 Point
## C178S3     C178S3 178 South
## C172N1     C172N1 172 North
## C172S1     C172S1 172 South
## C178N3     C178N3 178 North
## C185N2     C185N2 185 North
## C185N3     C185N3 185 North
## C185S3     C185S3 185 South
## C214N2     C214N2 214 North
## C172P2     C172P2 172 Point
## C185S2     C185S2 185 South
## C172P3     C172P3 172 Point
## C185N1     C185N1 185 North
## C172N2     C172N2 172 North
## C178S1     C178S1 178 South
## C185S1     C185S1 185 South
## C172S2     C172S2 172 South
## C178N2     C178N2 178 North
head(otu_table(biom))
## OTU Table:          [6 taxa and 52 samples]
##                      taxa are rows
##                                C178N1 C178P1 C185P2 C206N2 C206P1 C206P2
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      1
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     2      0      0      6      0      4
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                   137      8      1     73      8      6
##                                C214P1 C214P2 C214P3 C214S1 C214S2 C214S3
## New.CleanUp.ReferenceOTU155901      0      0      1      0      1      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     9      4     11      2      1      0
## JQ945994.1.1399                     1      0      0      1      0      0
## EF653577.1.1339                     0      0      0      4      0      0
## JQ814729.1.1495                    11      4     27     40      8     23
##                                C185P1 C185P3 C199P3 C199S2 C199S3 C206N1
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      1      0      0      0      0
## KC551585.1.1451                     3      2      5      0      0     11
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     6      4      2     11      2     56
##                                C178P2 C199N3 C206S1 C214N3 C199N2 C206N3
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     0      1      4      3      0      3
## JQ945994.1.1399                     0      0      2      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                    10     23      6     19      5    849
##                                C206S2 C199N1 C199P1 C199S1 C214N1 C172P1
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     1      0      8      2      5      0
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                    14     24      5      5      3      2
##                                C199P2 C172N3 C172S3 C178S2 C178P3 C178S3
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     2      5      8      0      1      0
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     2    121     31      0      0      5
##                                C172N1 C172S1 C178N3 C185N2 C185N3 C185S3
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0      0      0
## KC551585.1.1451                     0      0      1      0      0      0
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     5     16      4      6     27      4
##                                C214N2 C172P2 C185S2 C172P3 C185N1 C172N2
## New.CleanUp.ReferenceOTU155901      0      0      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      1      0      0      0
## KC551585.1.1451                     3      2      0      2      1      6
## JQ945994.1.1399                     0      0      0      0      0      0
## EF653577.1.1339                     0      0      0      0      0      0
## JQ814729.1.1495                     4     13      1      3     22     21
##                                C178S1 C185S1 C172S2 C178N2
## New.CleanUp.ReferenceOTU155901      0      0      0      0
## New.CleanUp.ReferenceOTU321320      0      0      0      0
## KC551585.1.1451                     0      1      0      3
## JQ945994.1.1399                     0      0      3      0
## EF653577.1.1339                     0      0      0      0
## JQ814729.1.1495                     2      3     21     18
# Custom plotting. 
nolegend <- theme(legend.position="none")
readabund <- labs(y="read abundance")
# Normalize by relative abundance. 
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Make a list of all genera identified. 
genera.relabund <- sort(get_taxa_unique(biom.relabund, "Genus"), decreasing=FALSE)
write(genera.relabund, file = "genera-relabund.txt")
# Find all genera
all.genera <- read.table(file = "genera-relabund.txt")
all.genera <- as.vector(all.genera$V1)
# 
biom.relabund.all.genera <- subset_taxa(biom.relabund, Genus %in% as.factor(all.genera))
biom.relabund.all.genera
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 11769 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 11769 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 11769 tips and 11763 internal nodes ]
biom.relabund.all.genera <- psmelt(biom.relabund.all.genera)
biom.relabund.all.genera.genus <- biom.relabund.all.genera%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(biom.relabund.all.genera.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
## 
##   Sample GenusAbundance TreatmentGroup   Site   Date          Family
##    <chr>          <dbl>         <fctr> <fctr> <fctr>          <fctr>
## 1 C178S2     0.14728018          Early  South    178 Crenotrichaceae
## 2 C206N2     0.07541493           Late  North    206      Thermaceae
## 3 C199S1     0.12408425           Late  South    199 Crenotrichaceae
## 4 C185S1     0.12728636          Early  South    185 Crenotrichaceae
## 5 C199S3     0.10486769           Late  South    199 Crenotrichaceae
## 6 C172S1     0.10894857          Early  South    172   Cytophagaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Not working: 
# p <- ggplot(biom.relabund.all.genera.genus, aes(as.factor(Date), GenusAbundance, color = Site))
# p <- p + geom_point() + facet_wrap(~Genus, scales="free_y")
# p
biom.relabund.all.genera.genus.est <- summarySE(biom.relabund.all.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(biom.relabund.all.genera.genus.est)
##    Site Date           Genus N GenusAbundance           sd           se
## 1 North  172             A17 3   1.044934e-05 7.240333e-06 4.180208e-06
## 2 North  172   Achromobacter 3   1.205785e-06 2.088482e-06 1.205785e-06
## 3 North  172 Acidaminobacter 3   2.030206e-05 3.516421e-05 2.030206e-05
## 4 North  172      Acidocella 3   0.000000e+00 0.000000e+00 0.000000e+00
## 5 North  172      Acidovorax 3   4.023211e-05 3.508560e-05 2.025668e-05
## 6 North  172   Acinetobacter 3   1.395043e-04 1.803168e-04 1.041059e-04
##             ci
## 1 1.798598e-05
## 2 5.188076e-06
## 3 8.735273e-05
## 4 0.000000e+00
## 5 8.715747e-05
## 6 4.479317e-04
biom.relabund.all.genera.genus.est$Date <- as.character(biom.relabund.all.genera.genus.est$Date)
biom.relabund.all.genera.genus.est$Date <- as.numeric(biom.relabund.all.genera.genus.est$Date)
p <- ggplot(biom.relabund.all.genera.genus.est, aes(x=Date, y=GenusAbundance)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_smooth(method="lm", se=TRUE) + facet_wrap(~Genus~Site, ncol = 3, scales="free_y")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))